Metlin’s got a ton of useful data, but it’s trapped behind a weird interactive Javascript thing in their website. The functions here allow the retrieval of arbitrary amounts of data from the website in a scripted fashion.

Two notes - one, I’m pretty sure this would not make Metlin happy. I’m not sure it’s technically illegal but I’d rather not have to figure that out. Two, Metlin has some rate-limitations in place that, if exceeded, will block an IP for about a day. I triggered this while trying to pull down the data for 20 amino acids, 5 times, within a minute. So I’m not sure how generous or aggressive they actually are.

library(getMetlin)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Start by searching for Metlin records by name or mass
head(getMetlinMz(117.078979))
## [1] "Metlin returned 145 compound(s) between 117.078686302552 and 117.079271697448 m/z with 1 unique formula(s): C5H11NO2"
## [1] "Of those, 5 have experimental MS/MS data: L-Valine, Betaine, N-Methyl-a-aminoisobutyric acid, 5-Aminopentanoic acid, L-Norvaline"
##   cmpd_id    exact_mass                       cmpd_name  formula      CAS
## 1      35 117.078978601                        L-Valine C5H11NO2  72-18-4
## 2     287 117.078978601                         Betaine C5H11NO2 590-46-5
## 3    6508    117.078979 N-Methyl-a-aminoisobutyric acid C5H11NO2         
## 4    6762 117.078978601                 isoamyl nitrite C5H11NO2         
## 5    6902 117.078978601           5-Aminopentanoic acid C5H11NO2         
## 6   35940    117.078965          4R-aminopentanoic acid C5H11NO2         
##     KEGG         MSMS Structure
## 1 C00183 experimental          
## 2 C00719 experimental          
## 3        experimental          
## 4 C07457           NO          
## 5 C00431 experimental          
## 6                  NO
head(getMetlinName("Betaine"))
## [1] "Metlin returned 35 compound(s) with name Betaine  with 33 unique formula(s): C11H21NO9, C12H17NO3, C12H17NO5, C16H12ClNO3, C16H15NO3S, C17H15NO3, C18H15NO5, C18H34N2O3, C26H53NO2, C42H81NO7, C42H81NO8, C44H81NO7, C46H79NO8, C48H83NO7, C4H9O2Se, C5H10N2S2, C5H11AsO2, C5H11NO, C5H11NO2, C5H11O2Se, C5H12ClNO, C5H12ClNO2, C5H12NO, C5H13NO3, C6H13AsO3, C6H13NO2, C6H14NO2, C7H13NO2, C7H14Cl3NO4, C7H15NO2, C7H15NO3, C9H20NO2, C9H22INO2"
## [1] "Of those, 5 have experimental MS/MS data: L-Carnitine, Betaine aldehyde, Betaine, Arsenobetaine, gamma-Butyrobetaine Ethyl Ester Chloride"
##   cmpd_id    exact_mass                       cmpd_name   formula
## 1      52 161.105193351                     L-Carnitine  C7H15NO3
## 2     278 101.084063979                Betaine aldehyde   C5H11NO
## 3     287 117.078978601                         Betaine  C5H11NO2
## 4     966 145.110278729 4-Trimethylammoniobutanoic acid  C7H15NO2
## 5    7089 143.094628665                 Proline betaine  C7H13NO2
## 6   46617 711.601303951                 DGTS(16:0/16:0) C42H81NO7
##         CAS   KEGG         MSMS Structure
## 1  541-15-1 C00318 experimental          
## 2 7418-61-3 C00576 experimental          
## 3  590-46-5 C00719 experimental          
## 4  407-64-7 C01181                       
## 5           C10172           NO          
## 6                            NO
# Then, access the MS/MS data for a record that has them
sample_ms2_cmpd <- getMetlinMz(117.078979) %>% 
  filter(MSMS=="experimental") %>% 
  slice(1)
## [1] "Metlin returned 145 compound(s) between 117.078686302552 and 117.079271697448 m/z with 1 unique formula(s): C5H11NO2"
## [1] "Of those, 5 have experimental MS/MS data: L-Valine, Betaine, N-Methyl-a-aminoisobutyric acid, 5-Aminopentanoic acid, L-Norvaline"
metlin_id <- as.numeric(as.character(sample_ms2_cmpd$cmpd_id))

sample_ms2 <- getMetlinMS2(metlin_id)
## [1] "Metlin had 6 MS2 records for this compound, with collision energies of +0, +10, +20, +40, -0, -10"
head(sample_ms2)
##   polarity adduct voltage frag_mass   frag_int
## 1        +  [M+H]       0 118.18773   0.518033
## 2        +  [M+H]       0 118.08622 100.000000
## 3        +  [M+H]       0  72.08146  61.057000
## 4        +  [M+H]       0  55.05462   1.623280
## 5        +  [M+H]      10 118.08630   4.202360
## 6        +  [M+H]      10  72.16100   0.658412
# Plot it
split_volt_pos_ms2 <- sample_ms2 %>%
  subset(polarity=="+") %>%
  split(.$voltage)
layout(matrix(c(1, rep(2:(length(split_volt_pos_ms2)+1), each=2),  1), ncol = 1))
par(mar=c(0.1, 4.1, 0.1, 0.1))
plot.new()
text(x = 0.5, y=1, labels = sample_ms2_cmpd$cmpd_name, cex=3)
for(i in split_volt_pos_ms2){
  plot(i$frag_mass, i$frag_int, xlab = "", 
       ylab=paste("Voltage", unique(i$voltage)),
       xlim=c(0, max(sample_ms2$frag_mass)), 
       xaxt="n", yaxt="n", type="n", ylim=c(0, 120))
  segments(x0 = i$frag_mass, x1 = i$frag_mass,
           y0 = 0, y1 = i$frag_int)
  axis(side = 2, at = c(0, 50, 100), labels = c(0, 50, 100))
}
axis(side = 1)

library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

gp <- sample_ms2 %>%
  filter(polarity=="+") %>%
    #filter(voltage==20) %>%
    ggplot(label=frag_mass) +
    geom_segment(aes(yend=0, x=frag_mass, y=frag_int, xend=frag_mass)) +
    #geom_hline(yintercept=0) +
    facet_wrap(~voltage, ncol = 1) +
    theme_bw() +
    xlim(0, max(sample_ms2$frag_mass))
gp

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(gp, tooltip = c("y", "x"))